# Clears work environment
rm(list = ls())

# Sets working directory
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')

# Loads data and libraries
load(file='cleanedLSdata.RData')
load(file='newdata.RData')
loadPkg(packs)

# Applies pseudo-separation plot to new count data
sepcolor = c('#FFFFFF', '#00BFC4', '#0BCA88', '#16D14C', '#32D723', '#7FDE30', '#C7E43E', '#F19D5C', '#F8766D')
extra = '#EBCC4D'
ndsort = newdata[with(newdata, order(majorinitone)), ]
rownames(ndsort) = NULL
ndsort$index = seq(1, (nrow(newdata) * 5), 5)
ndsort$min = 0
ndsort$max = 8
OSrealcount = ggplot(data=ndsort[which(ndsort$majorinitone>=1),], aes(x=index, y=majorinitone, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange(size=3.5) +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor[2:9]) +
  scale_x_continuous(limits=c((length(which(ndsort$majorinitone<1)) * 5), (nrow(newdata) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Real data: MIDs at least 1') +
  theme(panel.background = element_rect(fill='grey95'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
OSrealzero = ggplot(data=ndsort[which(ndsort$majorinitone<1),], aes(x=index, y=majorinitone, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(ndsort$majorinitone<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Real data: MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

# Applies pseudo-separation plot to new data with negative binomial model
nbnewdatasort = newdata[with(newdata, order(nb)), ]
rownames(nbnewdatasort) = NULL
nbnewdatasort$index = seq(1, (nrow(newdata) * 5), 5)
nbnewdatasort$min = 0
nbnewdatasort$max = 8
OSnbsepplotcount = ggplot(data=nbnewdatasort[which(nbnewdatasort$nb>=1),], aes(x=index, y=nb, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange(size=10) +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c((length(which(nbnewdatasort$nb<1)) * 5), (nrow(newdata) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Negative binomial model: predicted MIDs at least 1') +
  theme(panel.background = element_rect(fill='grey95'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
OSnbsepplotzero = ggplot(data=nbnewdatasort[which(nbnewdatasort$nb<1),], aes(x=index, y=nb, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(nbnewdatasort$nb<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Negative binomial model: predicted MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

# Applies pseudo-separation plot to new data with zero-inflated negative binomial model
zinewdatasort = newdata[with(newdata, order(zi)), ]
rownames(zinewdatasort) = NULL
zinewdatasort$index = seq(1, (nrow(newdata) * 5), 5)
zinewdatasort$min = 0
zinewdatasort$max = 8
OSzisepplotcount = ggplot(data=zinewdatasort[which(zinewdatasort$zi>=1),], aes(x=index, y=zi, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange(size=10) +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c((length(which(zinewdatasort$zi<1)) * 5), (nrow(newdata) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Zero-inflated negative binomial model: predicted MIDs at least 1') +
  theme(panel.background = element_rect(fill='grey95'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())
OSzisepplotzero = ggplot(data=zinewdatasort[which(zinewdatasort$zi<1),], aes(x=index, y=zi, ymin=min, ymax=max, color=factor(majorinitone))) +
  geom_hline(y=1, color='black', alpha=.25) +
  geom_hline(y=2, color='black', alpha=.25) +
  geom_hline(y=3, color='black', alpha=.25) +
  geom_hline(y=4, color='black', alpha=.25) +
  geom_hline(y=5, color='black', alpha=.25) +
  geom_hline(y=6, color='black', alpha=.25) +
  geom_hline(y=7, color='black', alpha=.25) +
  geom_hline(y=8, color='black', alpha=.25) +
  geom_linerange() +
  geom_line(color='black') +
  scale_color_manual(values=sepcolor) +
  scale_x_continuous(limits=c(0, (length(which(zinewdatasort$zi<1)) * 5)), expand=c(0,0)) +
  scale_y_continuous(breaks=seq(0, 8, 1)) +
  guides(color=F) +
  labs(title='Zero-inflated negative binomial model: predicted MIDs less than 1') +
  theme(panel.background = element_rect(fill='white'),
        panel.grid.minor.x=element_blank(), 
        panel.grid.minor.y=element_blank(),
        panel.grid.major.x=element_blank(), 
        panel.grid.major.y=element_line(color='darkgrey'), 
        axis.text.x=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        axis.ticks.x=element_blank())

OSsepplotGrid = arrangeGrob(OSrealzero, OSnbsepplotzero, OSzisepplotzero, OSrealcount, OSnbsepplotcount, OSzisepplotcount, ncol=1)
ggsave(file='OSsepplotGrid3.pdf', OSsepplotGrid, width=9, height=12, units='in')

save(file='panel7_out-of-sample_pseudo-separation_plots.RData', sepcolor, ndsort, nbnewdatasort, 
     zinewdatasort, OSrealzero, OSnbsepplotzero, OSzisepplotzero, OSrealcount, OSnbsepplotcount, 
     OSzisepplotcount, OSsepplotGrid)